from calendar import c
from msilib.schema import File
from operator import le
from traceback import print_tb
from turtle import ht
from wsgiref.util import FileWrapper
from xml.etree.ElementTree import QName
from pandas import qcut
from regex import F, R
import requests
from lxml import etree
import re
import time
import random
import sys
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8')


class Cnki():
    def __init__(self) -> None:
        self.url = 'https://r.cnki.net/KNavi/journal/navi/CJFD?orderBy=ComplexFactor&sort=desc&page=%s&DisplayMode=1'

    def down(self):
        with open('cnki%s.csv' % time.time(), "w+") as ff:
            for x in range(1, 305):
                html = requests.get(self.url % x)
                selector = etree.HTML(html.text)
                tr = selector.xpath('//*[@id="lblList"]/table/tr')
                print('第%s页' % x)
                for x in tr:
                    td = x.xpath('td')
                    trdd = [td[1].xpath('a/text()')[0].strip()]
                    for x in td[2:]:
                        trdd.append(x.xpath('text()')[0].strip())
                    trdd.append("https://r.cnki.net" +
                                td[1].xpath('a/@href')[0].strip())
                    line = '\t'.join(trdd).encode(
                        'gbk', "ignore").decode('gbk')
                    print(line)
                    ff.write(line+'\n')
                ff.writable()
                time.sleep(1)


Cnki = Cnki()
Cnki.down()
